import pandas as pd
from lxml import etree
import requests, time, random,csv

def save_datas(datas):
    with open('hetao.csv', mode='a', newline='', encoding='utf-8') as file:
        csv_writer = csv.writer(file, delimiter=',')
        csv_writer.writerow(datas)

requests.adapters.DEFAULT_RETRIES = 5
df = pd.read_csv('/Users/k/Desktop/gogogo/project/xinjiangjianguo/urls/urls.csv')
headers={
    'referer': 'https://s.taobao.com/',
    'Connection': 'close',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36 Edg/89.0.774.57',
    # 'cookie': 'cna=I2PeGCUxn00CAd9yv1T9gVuk; _samesite_flag_=true; lgc=tb892656134; dnk=tb892656134; tracknick=tb892656134; enc=gN10t2SNLjQ7dbgflw438DpxL5nOrFkaCDWrkLeT1ViSrgTLPHMFzDEmtmGn4eodxKlqF7G%2BPLsjkAVN8PyK4IEJJ0SxOU%2FL0VrUceMrgTs%3D; mt=ci=0_1; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; v=0; munb=2201549484267; miid=122788298876140752; cookie2=102c840cf8c9ca5e4e492aca0cfa7ad0; t=49dd69147809c881c26cb9b9c9483dbf; uc3=nk2=F5RNbBrtI%2FoQ3uk%3D&id2=UUphy%2FeB%2FEyAM6TwuA%3D%3D&vt3=F8dCuwuR9y4wzBXGKDM%3D&lg2=VT5L2FSpMGV7TQ%3D%3D; csg=974ec231; skt=e8c39326e04cab70; existShop=MTYxODI4NzU4Mg%3D%3D; uc4=id4=0%40U2grEJAStzY8CQg8YoUZa%2BfTHou9sepO&nk4=0%40FY4Gu65hipufpzK1EUVXgkDJYiITaQ%3D%3D; _cc_=WqG3DMC9EA%3D%3D; sgcookie=E100vXk6ZgWosOHOUnLRoqEIovK7sto94ewqldo7wl7QHmzuGYVQObkfHcWPgMrB%2FJem8dUc6afigl6g0RXhjzXG7g%3D%3D; _m_h5_tk=f328ed3994179506bcab16bc3a477acb_1618564046109; _m_h5_tk_enc=606b3cdcdc35d1e260a9c8b913a13d75; uc1=cookie21=VT5L2FSpczFp&existShop=false&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&pas=0&cookie14=Uoe1iuWXJ7uWbA%3D%3D; xlly_s=1; _tb_token_=761a363eeeb93; tfstk=cC7FBiiWdUX18yL7DeTrFf3mGSMdZUChZVRXxG2z0CM74IKhi9HJssyB_QMOBHf..; l=eBTehG1Hj1_znlbyBOfZhurza779_IRxHuPzaNbMiOCPO51w5I5OW6asJlTeCnhVh6YXR38kvdBWBeYBcI2xIghne5DDwCMmn; isg=BNfX_eKKe1cwxP_WRbGfw-aDZk0hHKt-D-uYcCkE4KYOWPeaMehAz_d6v_jG4oP2',
    #'cookie': 'cna=RcaSGJ6JqSACAXAg8Sr+n1D1; yunpk=1581496944052092; sca=465cbe4b; cdpid=UUphzOZ2BWN0kAnMTw%253D%253D; cnaui=2206483498179; aui=2206483498179; tbsa=8eb7e101f7283b6e4ea86149_1618564464_7; atpsida=9a616e151481f87d9ffff794_1618564482_8; cmida=1401053355_20210416171444',
    #'cookie': 'dnk=tb892656134; hng=CN%7Czh-CN%7CCNY%7C156; uc1=cookie15=V32FPkk%2Fw0dUvg%3D%3D&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&cookie21=U%2BGCWk%2F7pY%2FF&existShop=false&cookie14=Uoe1iuKIOP8uWg%3D%3D&pas=0; uc3=nk2=F5RNbBrtI%2FoQ3uk%3D&id2=UUphy%2FeB%2FEyAM6TwuA%3D%3D&vt3=F8dCuwuR80ZJhEX8fUI%3D&lg2=WqG3DMC9VAQiUQ%3D%3D; tracknick=tb892656134; lid=tb892656134; uc4=nk4=0%40FY4Gu65hipufpzK1EUVXgkDCkM7jgg%3D%3D&id4=0%40U2grEJAStzY8CQg8YoUZa%2BfTHoAt3%2F0W; lgc=tb892656134; sgcookie=E100yzC1pzTSmdcVFmFEVWmg264Z2hjqsbKgXzRzcsiwCGTYdavv5a%2F1gfpMFVF4ef81miSveTqrm3IIdnmPPiBJxw%3D%3D; csg=ebfb5a8e; enc=gN10t2SNLjQ7dbgflw438DpxL5nOrFkaCDWrkLeT1ViSrgTLPHMFzDEmtmGn4eodxKlqF7G%2BPLsjkAVN8PyK4IEJJ0SxOU%2FL0VrUceMrgTs%3D; cna=I2PeGCUxn00CAd9yv1T9gVuk; t=8e47dd8ca51850e499ebaeeaff986fe9; _tb_token_=581917e73edd; cookie2=150bfeb498a8c15f945b39249126c7d1; cq=ccp%3D1; sm4=659001; xlly_s=1; _m_h5_tk=c7f03dbc7d9b43d13f55a0e3f5900c6f_1618565425409; _m_h5_tk_enc=fc2c196af2dd9e8476cfaf940b890bdf; pnm_cku822=098%23E1hvt9vUvbpvU9CkvvvvvjiWPLLZ0jtERLSvQjljPmPv1jlhPLMy6jYERsswAjDvPsuevpvhvvmv9F9CvvpvvvvvmvhvLvsnuQvjn5c6D70wdeYalnkQD40OaAd6AnLZhqUf8%2BBl%2BE7rejyyYPexfXkfjLVxfwLwditABYoOD70OeC61D7zh18TJPDrr1n1lHd8gvpvIvvvvvhCvvvvvvUnvphvWiQvv96CvpC29vvm2phCvhRvvvUnvphvpg89Cvv9vvUvGeiP3PO9CvvOUvvVCJhhvvpvVvvpvvhCv29hvCPMMvvv%3D; isg=BLCw4KSVpMo8b3jDOizqXhStgXgC-ZRDPMZfxaoB5ovZZVEPUgqf0lIUvW0FGUwb; l=eBx4MqPnj1M-qIWdBO5Cnurza77T3IRb8GNzaNbMiInca6pdae0pmNCQ5mmHudtjgtfeXetyrHpOFdhJ7qz3WAkDBeYCKXIpBrJ9-; tfstk=cK8FBmq5dav1rYbSDw_zFSKUAXMdazAHZP5fxhD5nXkqccSd3sXLe1Aqd157BdIh.',
    'cookie': 'hng=CN%7Czh-CN%7CCNY%7C156; dnk=tb110195426; uc1=cookie14=Uoe1iuWUMVFxmQ%3D%3D&cookie15=VT5L2FSpMGV7TQ%3D%3D&existShop=false&pas=0&cookie21=W5iHLLyFeYTE&cookie16=Vq8l%2BKCLySLZMFWHxqs8fwqnEw%3D%3D; uc3=id2=UUphzOZ2BWN0kAnMTw%3D%3D&vt3=F8dCuwpnkYqcgKU5z88%3D&nk2=F5REP7xKzSPr4sI%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D; tracknick=tb110195426; lid=tb110195426; uc4=id4=0%40U2grF830mz57LphbYPL7ivBUtv9KApvR&nk4=0%40FY4PbaiVZ%2B1XimVGaiJ1zeLqPPmPNQ%3D%3D; lgc=tb110195426; cookie2=24b4a97bb47175dcf4c858fe9290ae84; sgcookie=E100DQqQqNQ71ykHjsIq0FRfUGVql%2BmkW2LWwOzHRvIZJJYcLxVbj0v%2FxrzmV4VUsIIUO5q958tNlKNAkkw664jXEg%3D%3D; t=26fc15c8d141bd4957f3e55f153e3877; csg=11f720cc; enc=ShSEzB6cVZ%2FnYlN52o2oEfwiLifE6Z%2BN7hbhiFYgUIwpsvOb3uRFSEz%2BfK9J%2Bopceb0ZeA5MOzBZQXSfrY4H2Zs0yxZcsXU4QzTw%2BaCPbvM%3D; _tb_token_=e78e0be3553d8; cna=RcaSGJ6JqSACAXAg8Sr+n1D1; xlly_s=1; pnm_cku822=098%23E1hv6QvUvbpvUvCkvvvvvjiWPLLZzjDRPLLy0jEUPmPUAjrUPLqhAjlUP25vQjr8RvhvCvvvphmUvpvVvpCmpJ2yuvhvmvvvpLDdsaeGKvhv8vvvpPWvvv2FvvCVVQvv9fUvvhNjvvvmjvvvBGwvvUjrvvCVVQvv9xIUvpCWvum0r3zhV8gcnkxb5ah65tkXah2ItCQ4HF%2BSBiVvVE01%2B2n79WpXjLeAnhjEKBmAdX368NLXrqpAhjCwD76XdiTOfvc6AC94e3O07pvCvvOv9hCvvvvRvpvhvv2MMTOCvvpvvhHh; _m_h5_tk=194a75f9b430e20358dd2842547dbf73_1618580640382; _m_h5_tk_enc=dd80f7d2f756f313b64dd22c5c54a3b7; tfstk=cpKhB7sxS1NfL-7MlDsBdyEd3T9hZkEPi3-6b3qWlkpBraxNiRaauUkSI6AH2z1..; l=eBIrpkJgjw1topT2BOfZnurza77OLIRXsuPzaNbMiOCPOF1A7Z8dW6as9VYJCnGVn6JXR3Rrt1BBByTgtyUi5lIeHwXn9MptVdBG.; isg=BGJiy10gdsQ9kWqYiemjnB7us-7Es2bNo349i6z70FWsfwP5lEC03UF1r7uDwN5l'
}
for i in df.detail_urls.tolist()[0:10]:
    tim = random.uniform(12, 16)
    print(i, '请等待{}秒'.format(tim))
    while 1:
        try:
            res=requests.get(url=i, headers=headers, verify=False)
            # res=requests.get(url='http'+i[5::], headers=headers, verify=False)
            # print(res.text)
            root = etree.HTML(res.text)
            contents = root.xpath('//div[@class="attributes"]//text()')
            time.sleep(tim)
            data = [x.strip().replace('\xa0', '').replace("\n", '').replace("\t", '') for x in contents if x.strip()]
            print(data)
            save_datas([i, data])
            break
        except:
            print("Connection refused by the server..")
            time.sleep(20)
            continue